This is R Notebook present a process analysis of 746 completed, fully validated, and archived projects in the HOT Tasking Manager (HOT-TM). Process discovery was performed using bupaR, a suite of open-source R packages for business process data analysis.

# Import required libraries
suppressWarnings({
library(bupaverse)
library(reshape2)
library(gt)
library(scales)
library(readr)
library(dplyr)
library(magrittr)
library(ggplot2)
library(Hmisc)
library(stringr)
})

Read event data The files containing project info (projects.csv) and project states(eventlog.csv) are read.

event_log_df <- read.csv("splits_invalidations.csv", stringsAsFactors = FALSE, sep = ",")
projects <- read.csv("projects.csv", stringsAsFactors = FALSE, sep = ",")
event_log_df[c('projectId', 'task')] <- str_split_fixed(event_log_df$taskId, '_', 2)
projects$projectId <- as.character(projects$projectId)
event_log_df = merge(x = event_log_df, y = projects[,c("projectId","difficulty","priority")], by = "projectId")
event_log_df <- event_log_df %>%
  convert_timestamps(columns = c("start", "complete"), format = ymd_hms) %>%
  activitylog(case_id = "taskId", activity_id = "action", resource_id = "actionBy", timestamps = c("start", "complete"))
event_log_df <- event_log_df %>%  mutate(action_duration = difftime(as.POSIXct(`complete`), as.POSIXct(`start`), units = "mins"))

Project states

head(event_log_df)
# Log of 12 events consisting of:
2 traces 
5 cases 
6 instances of 2 activities 
1 resource 
Events occurred from 2021-12-01 04:45:26 until 2021-12-01 04:54:13 
 
# Variables were mapped as follows:
Case identifier:        taskId 
Activity identifier:        action 
Resource identifier:        actionBy 
Timestamps:     start, complete 

Project info

head(projects)

Summary of the eventlog

summary(event_log_df)
Number of events:  3702910
Number of cases:  312289
Number of traces:  19084
Number of distinct activities:  10
Average trace length:  11.85732

Start eventlog:  NA
End eventlog:  NA

  projectId            taskId             action            actionBy             start                           complete                        username        
 Length:1853074     Length:1853074     Length:1853074     Length:1853074     Min.   :2021-12-01 04:45:26.33   Min.   :2021-12-01 04:46:29.93   Length:1853074    
 Class :character   Class :character   Class :character   Class :character   1st Qu.:2022-06-24 17:40:18.63   1st Qu.:2022-06-24 13:23:49.07   Class :character  
 Mode  :character   Mode  :character   Mode  :character   Mode  :character   Median :2022-11-10 14:41:44.95   Median :2022-11-10 10:05:05.82   Mode  :character  
                                                                             Mean   :2022-11-18 20:48:41.79   Mean   :2022-11-18 16:25:08.50                     
                                                                             3rd Qu.:2023-03-23 15:15:46.61   3rd Qu.:2023-03-23 15:42:18.42                     
                                                                             Max.   :2023-11-30 14:31:20.80   Max.   :2023-11-30 14:31:20.81                     
                                                                                                              NA's   :3238                                       
 mappingLevel           task            difficulty          priority             .order        action_duration  
 Length:1853074     Length:1853074     Length:1853074     Length:1853074     Min.   :      1   Length:1853074   
 Class :character   Class :character   Class :character   Class :character   1st Qu.: 463269   Class :difftime  
 Mode  :character   Mode  :character   Mode  :character   Mode  :character   Median : 926538   Mode  :numeric   
                                                                             Mean   : 926538                    
                                                                             3rd Qu.:1389806                    
                                                                             Max.   :1853074                    
                                                                                                                

Absolute frequency of states in the eventlog.

event_log_df %>% activity_frequency("activity")

Activity presence shows in what percentage of cases an activity is present.

event_log_df %>% activity_presence()
tmp <- event_log_df  %>% filter_trace_frequency(percentage = 0.85)
totalmapping <- tmp[tmp$action == "LOCKED_FOR_MAPPING",] %>% group_by(taskId) %>% summarise(mapping_duration = sum(action_duration)) 
totalmapping
totalvalidation <- tmp[tmp$action == "LOCKED_FOR_VALIDATION",] %>% group_by(taskId) %>% summarise(validation_duration = sum(action_duration)) 
totalvalidation
mappingvalidation = merge(totalmapping, totalvalidation, by='taskId', all=TRUE)
mappingvalidation[is.na(mappingvalidation)] = 0
mappingvalidation
mappingvalidation <- transform(mappingvalidation, morevalidation= ifelse(mapping_duration < validation_duration, 1, 0)) 
mean(mappingvalidation$morevalidation)*100
[1] 57.51616
tmp %>%  process_map(frequency("absolute"))

trace_explorer() with coverage argument n_traces = 10 shows the 10 most frequent in the event log.

event_log_df %>%    trace_explorer(n_traces = 10, show_labels = FALSE, coverage_labels = c("relative"))

In the temporal process map, the value of nodes and edges represent the median duration in days of activities and waiting times.

tmp1 <- tmp
tmp1 %>%  process_map(performance(median, "days"))
tmp1 <- tmp
tmp1 %>%  process_map(performance(median, "mins"))
event_log_df[event_log_df$difficulty == "EASY",] %>% activity_presence()
tmp <- event_log_df[event_log_df$difficulty == "EASY",]  %>% filter_trace_frequency(percentage = 0.85)
totalmapping <- tmp[tmp$action == "LOCKED_FOR_MAPPING",] %>% group_by(taskId) %>% summarise(mapping_duration = sum(action_duration)) 
totalmapping
totalvalidation <- tmp[tmp$action == "LOCKED_FOR_VALIDATION",] %>% group_by(taskId) %>% summarise(validation_duration = sum(action_duration)) 
totalvalidation
mappingvalidation = merge(totalmapping, totalvalidation, by='taskId', all=TRUE)
mappingvalidation[is.na(mappingvalidation)] = 0
mappingvalidation
mappingvalidation <- transform(mappingvalidation, morevalidation= ifelse(mapping_duration < validation_duration, 1, 0)) 
mean(mappingvalidation$morevalidation)*100
[1] 59.63736
tmp %>%  process_map(frequency("absolute"))

trace_explorer() with coverage argument n_traces = 10 shows the 10 most frequent in the event log.

tmp %>%    trace_explorer(n_traces = 10, show_labels = FALSE, coverage_labels = c("relative"))

In the temporal process map, the value of nodes and edges represent the median duration in days of activities and waiting times.

tmp1 <- tmp
tmp1 %>%  process_map(performance(median, "days"))
tmp1 <- tmp
tmp1 %>%  process_map(performance(median, "mins"))
event_log_df[event_log_df$difficulty == "MODERATE",] %>% activity_presence()
tmp <- event_log_df[event_log_df$difficulty == "MODERATE",]  %>% filter_trace_frequency(percentage = 0.85)
totalmapping <- tmp[tmp$action == "LOCKED_FOR_MAPPING",] %>% group_by(taskId) %>% summarise(mapping_duration = sum(action_duration)) 
totalmapping
totalvalidation <- tmp[tmp$action == "LOCKED_FOR_VALIDATION",] %>% group_by(taskId) %>% summarise(validation_duration = sum(action_duration)) 
totalvalidation
mappingvalidation = merge(totalmapping, totalvalidation, by='taskId', all=TRUE)
mappingvalidation[is.na(mappingvalidation)] = 0
mappingvalidation
mappingvalidation <- transform(mappingvalidation, morevalidation= ifelse(mapping_duration < validation_duration, 1, 0)) 
mean(mappingvalidation$morevalidation)*100
[1] 51.37316
tmp %>%  process_map(frequency("absolute"))

trace_explorer() with coverage argument n_traces = 10 shows the 10 most frequent in the event log.

tmp %>%    trace_explorer(n_traces = 10, show_labels = FALSE, coverage_labels = c("relative"))

In the temporal process map, the value of nodes and edges represent the median duration in days of activities and waiting times.

tmp1 <- tmp
tmp1 %>%  process_map(performance(median, "days"))
tmp1 <- tmp
tmp1 %>%  process_map(performance(median, "mins"))
event_log_df[event_log_df$difficulty == "CHALLENGING",] %>% activity_presence()
tmp <- event_log_df[event_log_df$difficulty == "CHALLENGING",]  %>% filter_trace_frequency(percentage = 0.85)
totalmapping <- tmp[tmp$action == "LOCKED_FOR_MAPPING",] %>% group_by(taskId) %>% summarise(mapping_duration = sum(action_duration)) 
totalmapping
totalvalidation <- tmp[tmp$action == "LOCKED_FOR_VALIDATION",] %>% group_by(taskId) %>% summarise(validation_duration = sum(action_duration)) 
totalvalidation
mappingvalidation = merge(totalmapping, totalvalidation, by='taskId', all=TRUE)
mappingvalidation[is.na(mappingvalidation)] = 0
mappingvalidation
mappingvalidation <- transform(mappingvalidation, morevalidation= ifelse(mapping_duration < validation_duration, 1, 0)) 
mean(mappingvalidation$morevalidation)*100
[1] 21.10187
tmp %>%  process_map(frequency("absolute"))

trace_explorer() with coverage argument n_traces = 10 shows the 10 most frequent in the event log.

tmp %>%    trace_explorer(n_traces = 10, show_labels = FALSE, coverage_labels = c("relative"))

In the temporal process map, the value of nodes and edges represent the median duration in days of activities and waiting times.

tmp1 <- tmp
tmp1 %>%  process_map(performance(median, "days"))
tmp1 <- tmp
tmp1 %>%  process_map(performance(median, "mins"))
event_log_df[event_log_df$priority == "LOW",] %>% activity_presence()
tmp <- event_log_df[event_log_df$priority == "LOW",]  %>% filter_trace_frequency(percentage = 0.85)
totalmapping <- tmp[tmp$action == "LOCKED_FOR_MAPPING",] %>% group_by(taskId) %>% summarise(mapping_duration = sum(action_duration)) 
totalmapping
totalvalidation <- tmp[tmp$action == "LOCKED_FOR_VALIDATION",] %>% group_by(taskId) %>% summarise(validation_duration = sum(action_duration)) 
totalvalidation
mappingvalidation = merge(totalmapping, totalvalidation, by='taskId', all=TRUE)
mappingvalidation[is.na(mappingvalidation)] = 0
mappingvalidation
mappingvalidation <- transform(mappingvalidation, morevalidation= ifelse(mapping_duration < validation_duration, 1, 0)) 
mean(mappingvalidation$morevalidation)*100
[1] 61.36931
tmp %>%  process_map(frequency("absolute"))

trace_explorer() with coverage argument n_traces = 10 shows the 10 most frequent in the event log.

tmp %>%    trace_explorer(n_traces =10, show_labels = FALSE, coverage_labels = c("relative"))

In the temporal process map, the value of nodes and edges represent the median duration in days of activities and waiting times.

tmp1 <- tmp
tmp1 %>%  process_map(performance(median, "days"))
tmp1 <- tmp
tmp1 %>%  process_map(performance(median, "mins"))
event_log_df[event_log_df$priority == "MEDIUM",] %>% activity_presence()
tmp <- event_log_df[event_log_df$priority == "MEDIUM",]  %>% filter_trace_frequency(percentage = 0.85)
totalmapping <- tmp[tmp$action == "LOCKED_FOR_MAPPING",] %>% group_by(taskId) %>% summarise(mapping_duration = sum(action_duration)) 
totalmapping
totalvalidation <- tmp[tmp$action == "LOCKED_FOR_VALIDATION",] %>% group_by(taskId) %>% summarise(validation_duration = sum(action_duration)) 
totalvalidation
mappingvalidation = merge(totalmapping, totalvalidation, by='taskId', all=TRUE)
mappingvalidation[is.na(mappingvalidation)] = 0
mappingvalidation
mappingvalidation <- transform(mappingvalidation, morevalidation= ifelse(mapping_duration < validation_duration, 1, 0)) 
mean(mappingvalidation$morevalidation)*100
[1] 57.85076
tmp %>%  process_map(frequency("absolute"))

trace_explorer() with coverage argument n_traces = 10 shows the 10 most frequent in the event log.

tmp %>%    trace_explorer(n_traces = 10, show_labels = FALSE, coverage_labels = c("relative"))

In the temporal process map, the value of nodes and edges represent the median duration in days of activities and waiting times.

tmp1 <- tmp
tmp1 %>%  process_map(performance(median, "days"))
tmp1 <- tmp
tmp1 %>%  process_map(performance(median, "mins"))
event_log_df[event_log_df$priority == "HIGH",] %>% activity_presence()
tmp <- event_log_df[event_log_df$priority == "HIGH",]  %>% filter_trace_frequency(percentage = 0.85)
totalmapping <- tmp[tmp$action == "LOCKED_FOR_MAPPING",] %>% group_by(taskId) %>% summarise(mapping_duration = sum(action_duration)) 
totalmapping
totalvalidation <- tmp[tmp$action == "LOCKED_FOR_VALIDATION",] %>% group_by(taskId) %>% summarise(validation_duration = sum(action_duration)) 
totalvalidation
mappingvalidation = merge(totalmapping, totalvalidation, by='taskId', all=TRUE)
mappingvalidation[is.na(mappingvalidation)] = 0
mappingvalidation
mappingvalidation <- transform(mappingvalidation, morevalidation= ifelse(mapping_duration < validation_duration, 1, 0)) 
mean(mappingvalidation$morevalidation)*100
[1] 50.39496
tmp %>%  process_map(frequency("absolute"))

trace_explorer() with coverage argument n_traces = 10 shows the 10 most frequent in the event log.

tmp %>%    trace_explorer(n_traces = 10, show_labels = FALSE, coverage_labels = c("relative"))

In the temporal process map, the value of nodes and edges represent the median duration in days of activities and waiting times.

tmp1 <- tmp
tmp1 %>%  process_map(performance(median, "days"))
tmp1 <- tmp
tmp1 %>%  process_map(performance(median, "mins"))
event_log_df[event_log_df$priority == "URGENT",] %>% activity_presence()
tmp <- event_log_df[event_log_df$priority == "URGENT",]  %>% filter_trace_frequency(percentage = 0.85)
totalmapping <- tmp[tmp$action == "LOCKED_FOR_MAPPING",] %>% group_by(taskId) %>% summarise(mapping_duration = sum(action_duration)) 
totalmapping
totalvalidation <- tmp[tmp$action == "LOCKED_FOR_VALIDATION",] %>% group_by(taskId) %>% summarise(validation_duration = sum(action_duration)) 
totalvalidation
mappingvalidation = merge(totalmapping, totalvalidation, by='taskId', all=TRUE)
mappingvalidation[is.na(mappingvalidation)] = 0
mappingvalidation
mappingvalidation <- transform(mappingvalidation, morevalidation= ifelse(mapping_duration < validation_duration, 1, 0)) 
mean(mappingvalidation$morevalidation)*100
[1] 27.51878
tmp %>%  process_map(frequency("absolute"))

trace_explorer() with coverage argument n_traces = 10 shows the 10 most frequent in the event log.

tmp %>%    trace_explorer(n_traces = 10, show_labels = FALSE, coverage_labels = c("relative"))

In the temporal process map, the value of nodes and edges represent the median duration in days of activities and waiting times.

tmp1 <- tmp
tmp1 %>%  process_map(performance(median, "days"))
tmp1 <- tmp
tmp1 %>%  process_map(performance(median, "mins"))

In the frequency process map, nodes represent the absolute number of activity instance executions and edges represent the absolute number of times source and target activities were executed directly following each other. To provide a clear process map, the event log was previously filter using filter_trace_frequency(). Setting percentage = 0.85 selects at least 85% of the cases, starting with those that have the highest frequency.

tmp <- event_log_df %>% filter_trace_frequency(percentage = 0.85)
tmp %>%  process_map(frequency("absolute"))

trace_explorer() with coverage argument n_traces = 10 shows the 10 most frequent in the event log.

event_log_df %>%    trace_explorer(n_traces = 10, show_labels = FALSE, coverage_labels = c("relative"))

In the temporal process map, the value of nodes and edges represent the median duration in days of activities and waiting times.

Organisation

Composition of the total number of contributors of the analysed projects according to their mapping level.

Breakdown of status execution frequency per mapping level.

---
title: "Process_Analysis"
output:
  pdf_document: default
  html_notebook: default
---
This is R Notebook present a process analysis of 746 completed, fully validated, and archived projects in the HOT Tasking Manager (HOT-TM). Process discovery was performed using bupaR, a suite of open-source R packages for business process data analysis.
```{r}
# Import required libraries
suppressWarnings({
library(bupaverse)
library(reshape2)
library(gt)
library(scales)
library(readr)
library(dplyr)
library(magrittr)
library(ggplot2)
library(Hmisc)
library(stringr)
})
```

Read event data
The files containing project info (projects.csv) and project states(eventlog.csv) are read.
```{r}
event_log_df <- read.csv("splits_invalidations.csv", stringsAsFactors = FALSE, sep = ",")
projects <- read.csv("projects.csv", stringsAsFactors = FALSE, sep = ",")
event_log_df[c('projectId', 'task')] <- str_split_fixed(event_log_df$taskId, '_', 2)
projects$projectId <- as.character(projects$projectId)
event_log_df = merge(x = event_log_df, y = projects[,c("projectId","difficulty","priority")], by = "projectId")
event_log_df <- event_log_df %>%
  convert_timestamps(columns = c("start", "complete"), format = ymd_hms) %>%
  activitylog(case_id = "taskId", activity_id = "action", resource_id = "actionBy", timestamps = c("start", "complete"))
event_log_df <- event_log_df %>%  mutate(action_duration = difftime(as.POSIXct(`complete`), as.POSIXct(`start`), units = "mins"))
```

Project states
```{r}
head(event_log_df)
```
Project info
```{r}
head(projects)
```
Summary of the eventlog 
```{r}
summary(event_log_df)
```

Absolute frequency of states in the eventlog.
```{r}
event_log_df %>% activity_frequency("activity")
```


Activity presence shows in what percentage of cases an activity is present.
```{r}
event_log_df %>% activity_presence()
```
```{r}
tmp <- event_log_df  %>% filter_trace_frequency(percentage = 0.85)
```

```{r}
totalmapping <- tmp[tmp$action == "LOCKED_FOR_MAPPING",] %>% group_by(taskId) %>% summarise(mapping_duration = sum(action_duration)) 
totalmapping
```



```{r}
totalvalidation <- tmp[tmp$action == "LOCKED_FOR_VALIDATION",] %>% group_by(taskId) %>% summarise(validation_duration = sum(action_duration)) 
totalvalidation
```

```{r}
mappingvalidation = merge(totalmapping, totalvalidation, by='taskId', all=TRUE)
mappingvalidation[is.na(mappingvalidation)] = 0
mappingvalidation
```


```{r}
mappingvalidation <- transform(mappingvalidation, morevalidation= ifelse(mapping_duration < validation_duration, 1, 0)) 
mean(mappingvalidation$morevalidation)*100
```

```{r}
tmp %>%  process_map(frequency("absolute"))
```

trace_explorer() with coverage argument n_traces = 10 shows the 10 most frequent in the event log.
```{r}
event_log_df %>%    trace_explorer(n_traces = 10, show_labels = FALSE, coverage_labels = c("relative"))
```

In the temporal process map, the value of nodes and edges represent the median duration in days of activities and waiting times.
```{r}
tmp1 <- tmp
tmp1 %>%  process_map(performance(median, "days"))
```

```{r}
tmp1 <- tmp
tmp1 %>%  process_map(performance(median, "mins"))
``` 

```{r}
event_log_df[event_log_df$difficulty == "EASY",] %>% activity_presence()
```

```{r}
tmp <- event_log_df[event_log_df$difficulty == "EASY",]  %>% filter_trace_frequency(percentage = 0.85)
```

```{r}
totalmapping <- tmp[tmp$action == "LOCKED_FOR_MAPPING",] %>% group_by(taskId) %>% summarise(mapping_duration = sum(action_duration)) 
totalmapping
```

```{r}
totalvalidation <- tmp[tmp$action == "LOCKED_FOR_VALIDATION",] %>% group_by(taskId) %>% summarise(validation_duration = sum(action_duration)) 
totalvalidation
```

```{r}
mappingvalidation = merge(totalmapping, totalvalidation, by='taskId', all=TRUE)
mappingvalidation[is.na(mappingvalidation)] = 0
mappingvalidation
```

```{r}
mappingvalidation <- transform(mappingvalidation, morevalidation= ifelse(mapping_duration < validation_duration, 1, 0)) 
mean(mappingvalidation$morevalidation)*100
```

```{r}
tmp %>%  process_map(frequency("absolute"))
```

trace_explorer() with coverage argument n_traces = 10 shows the 10 most frequent in the event log.
```{r}
tmp %>%    trace_explorer(n_traces = 10, show_labels = FALSE, coverage_labels = c("relative"))
```

In the temporal process map, the value of nodes and edges represent the median duration in days of activities and waiting times.
```{r}
tmp1 <- tmp
tmp1 %>%  process_map(performance(median, "days"))
```

```{r}
tmp1 <- tmp
tmp1 %>%  process_map(performance(median, "mins"))
```

```{r}
event_log_df[event_log_df$difficulty == "MODERATE",] %>% activity_presence()
```

```{r}
tmp <- event_log_df[event_log_df$difficulty == "MODERATE",]  %>% filter_trace_frequency(percentage = 0.85)
```

```{r}
totalmapping <- tmp[tmp$action == "LOCKED_FOR_MAPPING",] %>% group_by(taskId) %>% summarise(mapping_duration = sum(action_duration)) 
totalmapping
```

```{r}
totalvalidation <- tmp[tmp$action == "LOCKED_FOR_VALIDATION",] %>% group_by(taskId) %>% summarise(validation_duration = sum(action_duration)) 
totalvalidation
```

```{r}
mappingvalidation = merge(totalmapping, totalvalidation, by='taskId', all=TRUE)
mappingvalidation[is.na(mappingvalidation)] = 0
mappingvalidation
```

```{r}
mappingvalidation <- transform(mappingvalidation, morevalidation= ifelse(mapping_duration < validation_duration, 1, 0)) 
mean(mappingvalidation$morevalidation)*100
```

```{r}
tmp %>%  process_map(frequency("absolute"))
```

trace_explorer() with coverage argument n_traces = 10 shows the 10 most frequent in the event log.
```{r}
tmp %>%    trace_explorer(n_traces = 10, show_labels = FALSE, coverage_labels = c("relative"))
```

In the temporal process map, the value of nodes and edges represent the median duration in days of activities and waiting times.
```{r}
tmp1 <- tmp
tmp1 %>%  process_map(performance(median, "days"))
```

```{r}
tmp1 <- tmp
tmp1 %>%  process_map(performance(median, "mins"))
```

```{r}
event_log_df[event_log_df$difficulty == "CHALLENGING",] %>% activity_presence()
```

```{r}
tmp <- event_log_df[event_log_df$difficulty == "CHALLENGING",]  %>% filter_trace_frequency(percentage = 0.85)
```

```{r}
totalmapping <- tmp[tmp$action == "LOCKED_FOR_MAPPING",] %>% group_by(taskId) %>% summarise(mapping_duration = sum(action_duration)) 
totalmapping
```

```{r}
totalvalidation <- tmp[tmp$action == "LOCKED_FOR_VALIDATION",] %>% group_by(taskId) %>% summarise(validation_duration = sum(action_duration)) 
totalvalidation
```

```{r}
mappingvalidation = merge(totalmapping, totalvalidation, by='taskId', all=TRUE)
mappingvalidation[is.na(mappingvalidation)] = 0
mappingvalidation
```

```{r}
mappingvalidation <- transform(mappingvalidation, morevalidation= ifelse(mapping_duration < validation_duration, 1, 0)) 
mean(mappingvalidation$morevalidation)*100
```

```{r}
tmp %>%  process_map(frequency("absolute"))
```

trace_explorer() with coverage argument n_traces = 10 shows the 10 most frequent in the event log.
```{r}
tmp %>%    trace_explorer(n_traces = 10, show_labels = FALSE, coverage_labels = c("relative"))
```

In the temporal process map, the value of nodes and edges represent the median duration in days of activities and waiting times.
```{r}
tmp1 <- tmp
tmp1 %>%  process_map(performance(median, "days"))
```

```{r}
tmp1 <- tmp
tmp1 %>%  process_map(performance(median, "mins"))
```

```{r}
event_log_df[event_log_df$priority == "LOW",] %>% activity_presence()
```

```{r}
tmp <- event_log_df[event_log_df$priority == "LOW",]  %>% filter_trace_frequency(percentage = 0.85)
```

```{r}
totalmapping <- tmp[tmp$action == "LOCKED_FOR_MAPPING",] %>% group_by(taskId) %>% summarise(mapping_duration = sum(action_duration)) 
totalmapping
```

```{r}
totalvalidation <- tmp[tmp$action == "LOCKED_FOR_VALIDATION",] %>% group_by(taskId) %>% summarise(validation_duration = sum(action_duration)) 
totalvalidation
```

```{r}
mappingvalidation = merge(totalmapping, totalvalidation, by='taskId', all=TRUE)
mappingvalidation[is.na(mappingvalidation)] = 0
mappingvalidation
```

```{r}
mappingvalidation <- transform(mappingvalidation, morevalidation= ifelse(mapping_duration < validation_duration, 1, 0)) 
mean(mappingvalidation$morevalidation)*100
```

```{r}
tmp %>%  process_map(frequency("absolute"))
```

trace_explorer() with coverage argument n_traces = 10 shows the 10 most frequent in the event log.
```{r}
tmp %>%    trace_explorer(n_traces =10, show_labels = FALSE, coverage_labels = c("relative"))
```

In the temporal process map, the value of nodes and edges represent the median duration in days of activities and waiting times.
```{r}
tmp1 <- tmp
tmp1 %>%  process_map(performance(median, "days"))
```

```{r}
tmp1 <- tmp
tmp1 %>%  process_map(performance(median, "mins"))
```

```{r}
event_log_df[event_log_df$priority == "MEDIUM",] %>% activity_presence()
```

```{r}
tmp <- event_log_df[event_log_df$priority == "MEDIUM",]  %>% filter_trace_frequency(percentage = 0.85)
```

```{r}
totalmapping <- tmp[tmp$action == "LOCKED_FOR_MAPPING",] %>% group_by(taskId) %>% summarise(mapping_duration = sum(action_duration)) 
totalmapping
```

```{r}
totalvalidation <- tmp[tmp$action == "LOCKED_FOR_VALIDATION",] %>% group_by(taskId) %>% summarise(validation_duration = sum(action_duration)) 
totalvalidation
```

```{r}
mappingvalidation = merge(totalmapping, totalvalidation, by='taskId', all=TRUE)
mappingvalidation[is.na(mappingvalidation)] = 0
mappingvalidation
```

```{r}
mappingvalidation <- transform(mappingvalidation, morevalidation= ifelse(mapping_duration < validation_duration, 1, 0)) 
mean(mappingvalidation$morevalidation)*100
```

```{r}
tmp %>%  process_map(frequency("absolute"))
```

trace_explorer() with coverage argument n_traces = 10 shows the 10 most frequent in the event log.
```{r}
tmp %>%    trace_explorer(n_traces = 10, show_labels = FALSE, coverage_labels = c("relative"))
```

In the temporal process map, the value of nodes and edges represent the median duration in days of activities and waiting times.
```{r}
tmp1 <- tmp
tmp1 %>%  process_map(performance(median, "days"))
```

```{r}
tmp1 <- tmp
tmp1 %>%  process_map(performance(median, "mins"))
```

```{r}
event_log_df[event_log_df$priority == "HIGH",] %>% activity_presence()
```

```{r}
tmp <- event_log_df[event_log_df$priority == "HIGH",]  %>% filter_trace_frequency(percentage = 0.85)
```

```{r}
totalmapping <- tmp[tmp$action == "LOCKED_FOR_MAPPING",] %>% group_by(taskId) %>% summarise(mapping_duration = sum(action_duration)) 
totalmapping
```

```{r}
totalvalidation <- tmp[tmp$action == "LOCKED_FOR_VALIDATION",] %>% group_by(taskId) %>% summarise(validation_duration = sum(action_duration)) 
totalvalidation
```

```{r}
mappingvalidation = merge(totalmapping, totalvalidation, by='taskId', all=TRUE)
mappingvalidation[is.na(mappingvalidation)] = 0
mappingvalidation
```

```{r}
mappingvalidation <- transform(mappingvalidation, morevalidation= ifelse(mapping_duration < validation_duration, 1, 0)) 
mean(mappingvalidation$morevalidation)*100
```

```{r}
tmp %>%  process_map(frequency("absolute"))
```

trace_explorer() with coverage argument n_traces = 10 shows the 10 most frequent in the event log.
```{r}
tmp %>%    trace_explorer(n_traces = 10, show_labels = FALSE, coverage_labels = c("relative"))
```

In the temporal process map, the value of nodes and edges represent the median duration in days of activities and waiting times.
```{r}
tmp1 <- tmp
tmp1 %>%  process_map(performance(median, "days"))
```

```{r}
tmp1 <- tmp
tmp1 %>%  process_map(performance(median, "mins"))
```

```{r}
event_log_df[event_log_df$priority == "URGENT",] %>% activity_presence()
```

```{r}
tmp <- event_log_df[event_log_df$priority == "URGENT",]  %>% filter_trace_frequency(percentage = 0.85)
```

```{r}
totalmapping <- tmp[tmp$action == "LOCKED_FOR_MAPPING",] %>% group_by(taskId) %>% summarise(mapping_duration = sum(action_duration)) 
totalmapping
```

```{r}
totalvalidation <- tmp[tmp$action == "LOCKED_FOR_VALIDATION",] %>% group_by(taskId) %>% summarise(validation_duration = sum(action_duration)) 
totalvalidation
```

```{r}
mappingvalidation = merge(totalmapping, totalvalidation, by='taskId', all=TRUE)
mappingvalidation[is.na(mappingvalidation)] = 0
mappingvalidation
```

```{r}
mappingvalidation <- transform(mappingvalidation, morevalidation= ifelse(mapping_duration < validation_duration, 1, 0)) 
mean(mappingvalidation$morevalidation)*100
```

```{r}
tmp %>%  process_map(frequency("absolute"))
```

trace_explorer() with coverage argument n_traces = 10 shows the 10 most frequent in the event log.
```{r}
tmp %>%    trace_explorer(n_traces = 10, show_labels = FALSE, coverage_labels = c("relative"))
```

In the temporal process map, the value of nodes and edges represent the median duration in days of activities and waiting times.
```{r}
tmp1 <- tmp
tmp1 %>%  process_map(performance(median, "days"))
```

```{r}
tmp1 <- tmp
tmp1 %>%  process_map(performance(median, "mins"))
```

In the frequency process map, nodes represent the absolute number of activity instance executions and edges represent the absolute number of times source and target activities were executed directly following each other. To provide a clear process map, the event log was previously filter using filter_trace_frequency(). Setting percentage = 0.85 selects at least 85% of the cases, starting with those that have the highest frequency.
```{r}
tmp <- event_log_df %>% filter_trace_frequency(percentage = 0.85)
tmp %>%  process_map(frequency("absolute"))
```

trace_explorer() with coverage argument n_traces = 10 shows the 10 most frequent in the event log.
```{r}
event_log_df %>%    trace_explorer(n_traces = 10, show_labels = FALSE, coverage_labels = c("relative"))
```

In the temporal process map, the value of nodes and edges represent the median duration in days of activities and waiting times.
```{r}
tmp1 <- tmp
tmp1 %>%  process_map(performance(median, "days"))
```

```{r}
tmp1 <- tmp
tmp1 %>%  process_map(performance(median, "mins"))
```

# Organisation {#organisation}
Composition of the total number of contributors of the analysed projects according to their mapping level.

```{r}
mappingLevel <- event_log_df %>%  group_by(mappingLevel) %>% summarise(count = n_distinct(actionBy))
mappingLevel$percentage <- round(mappingLevel$count/sum(mappingLevel$count)*100,1)
mappingLevel
```

Breakdown of status execution frequency per mapping level.

```{r}
data_pivot <- dcast(event_log_df, action ~ mappingLevel,value.var = "taskId", length)
data_pivot$sum <-  data_pivot$ADVANCED + data_pivot$BEGINNER + data_pivot$INTERMEDIATE
data_pivot$ADVANCEDper <- round(data_pivot$ADVANCED/data_pivot$sum*100,1)
data_pivot$BEGINNERper <- round(data_pivot$BEGINNER/data_pivot$sum*100,1)
data_pivot$INTERMEDIATEper <- round(data_pivot$INTERMEDIATE/data_pivot$sum*100,1)
data_pivot[c("action","ADVANCEDper","BEGINNERper","INTERMEDIATEper")] %>% gt() %>%  data_color(columns = 2:4, colors = col_numeric(palette = c("white","darkgreen"),domain = c(0,100)))
```

```{r}
data_pivot <- dcast(event_log_df[event_log_df$action %in% c('LOCKED_FOR_MAPPING','MAPPED','AUTO_UNLOCKED_FOR_MAPPING','SPLIT','BADIMAGERY'),], projectId ~ mappingLevel,value.var = "username", fun.aggregate = uniqueN)
sum(data_pivot$BEGINNER)/(sum(data_pivot$ADVANCED)+sum(data_pivot$INTERMEDIATE)+sum(data_pivot$BEGINNER))*100
sum(data_pivot$INTERMEDIATE)/(sum(data_pivot$ADVANCED)+sum(data_pivot$INTERMEDIATE)+sum(data_pivot$BEGINNER))*100
sum(data_pivot$ADVANCED)/(sum(data_pivot$ADVANCED)+sum(data_pivot$INTERMEDIATE)+sum(data_pivot$BEGINNER))*100
```

```{r}
data_pivot <- dcast(event_log_df[event_log_df$action %in% c('LOCKED_FOR_VALIDATION','INVALIDATED','VALIDATED','AUTO_UNLOCKED_FOR_VALIDATION'),], projectId ~ mappingLevel,value.var = "username", fun.aggregate = uniqueN)
sum(data_pivot$BEGINNER)/(sum(data_pivot$ADVANCED)+sum(data_pivot$INTERMEDIATE)+sum(data_pivot$BEGINNER))*100
sum(data_pivot$INTERMEDIATE)/(sum(data_pivot$ADVANCED)+sum(data_pivot$INTERMEDIATE)+sum(data_pivot$BEGINNER))*100
sum(data_pivot$ADVANCED)/(sum(data_pivot$ADVANCED)+sum(data_pivot$INTERMEDIATE)+sum(data_pivot$BEGINNER))*100
```

```{r}
data_pivot <- dcast(event_log_df[event_log_df$difficulty == "EASY",], action ~ mappingLevel,value.var = "taskId", length)
data_pivot$sum <-  data_pivot$ADVANCED + data_pivot$BEGINNER + data_pivot$INTERMEDIATE
data_pivot$ADVANCEDper <- round(data_pivot$ADVANCED/data_pivot$sum*100,1)
data_pivot$BEGINNERper <- round(data_pivot$BEGINNER/data_pivot$sum*100,1)
data_pivot$INTERMEDIATEper <- round(data_pivot$INTERMEDIATE/data_pivot$sum*100,1)
data_pivot[c("action","ADVANCEDper","BEGINNERper","INTERMEDIATEper")] %>% gt() %>%  data_color(columns = 2:4, colors = col_numeric(palette = c("white","darkgreen"),domain = c(0,100)))
```

```{r}
easy <- event_log_df[event_log_df$difficulty == "EASY",]
data_pivot <- dcast(easy[easy$action %in% c('LOCKED_FOR_MAPPING','MAPPED','AUTO_UNLOCKED_FOR_MAPPING','SPLIT','BADIMAGERY'),], projectId ~ mappingLevel,value.var = "username", fun.aggregate = uniqueN)
sum(data_pivot$BEGINNER)/(sum(data_pivot$ADVANCED)+sum(data_pivot$INTERMEDIATE)+sum(data_pivot$BEGINNER))*100
sum(data_pivot$INTERMEDIATE)/(sum(data_pivot$ADVANCED)+sum(data_pivot$INTERMEDIATE)+sum(data_pivot$BEGINNER))*100
sum(data_pivot$ADVANCED)/(sum(data_pivot$ADVANCED)+sum(data_pivot$INTERMEDIATE)+sum(data_pivot$BEGINNER))*100
```

```{r}
data_pivot <- dcast(easy[easy$action %in% c('LOCKED_FOR_VALIDATION','INVALIDATED','VALIDATED','AUTO_UNLOCKED_FOR_VALIDATION'),], projectId ~ mappingLevel,value.var = "username", fun.aggregate = uniqueN)
sum(data_pivot$BEGINNER)/(sum(data_pivot$ADVANCED)+sum(data_pivot$INTERMEDIATE)+sum(data_pivot$BEGINNER))*100
sum(data_pivot$INTERMEDIATE)/(sum(data_pivot$ADVANCED)+sum(data_pivot$INTERMEDIATE)+sum(data_pivot$BEGINNER))*100
sum(data_pivot$ADVANCED)/(sum(data_pivot$ADVANCED)+sum(data_pivot$INTERMEDIATE)+sum(data_pivot$BEGINNER))*100
```

```{r}
mappingLevel <- event_log_df[event_log_df$difficulty == "EASY",] %>%  group_by(mappingLevel) %>% summarise(count = n_distinct(actionBy))
mappingLevel$percentage <- round(mappingLevel$count/sum(mappingLevel$count)*100,1)
mappingLevel
```

```{r}
data_pivot <- dcast(event_log_df[event_log_df$difficulty == "MODERATE",], action ~ mappingLevel,value.var = "taskId", length)
data_pivot$sum <-  data_pivot$ADVANCED + data_pivot$BEGINNER + data_pivot$INTERMEDIATE
data_pivot$ADVANCEDper <- round(data_pivot$ADVANCED/data_pivot$sum*100,1)
data_pivot$BEGINNERper <- round(data_pivot$BEGINNER/data_pivot$sum*100,1)
data_pivot$INTERMEDIATEper <- round(data_pivot$INTERMEDIATE/data_pivot$sum*100,1)
data_pivot[c("action","ADVANCEDper","BEGINNERper","INTERMEDIATEper")] %>% gt() %>%  data_color(columns = 2:4, colors = col_numeric(palette = c("white","darkgreen"),domain = c(0,100)))
```

```{r}
moderate <- event_log_df[event_log_df$difficulty == "MODERATE",]
data_pivot <- dcast(moderate[moderate$action %in% c('LOCKED_FOR_MAPPING','MAPPED','AUTO_UNLOCKED_FOR_MAPPING','SPLIT','BADIMAGERY'),], projectId ~ mappingLevel,value.var = "username", fun.aggregate = uniqueN)
sum(data_pivot$BEGINNER)/(sum(data_pivot$ADVANCED)+sum(data_pivot$INTERMEDIATE)+sum(data_pivot$BEGINNER))*100
sum(data_pivot$INTERMEDIATE)/(sum(data_pivot$ADVANCED)+sum(data_pivot$INTERMEDIATE)+sum(data_pivot$BEGINNER))*100
sum(data_pivot$ADVANCED)/(sum(data_pivot$ADVANCED)+sum(data_pivot$INTERMEDIATE)+sum(data_pivot$BEGINNER))*100
```

```{r}
data_pivot <- dcast(moderate[moderate$action %in% c('LOCKED_FOR_VALIDATION','INVALIDATED','VALIDATED','AUTO_UNLOCKED_FOR_VALIDATION'),], projectId ~ mappingLevel,value.var = "username", fun.aggregate = uniqueN)
sum(data_pivot$BEGINNER)/(sum(data_pivot$ADVANCED)+sum(data_pivot$INTERMEDIATE)+sum(data_pivot$BEGINNER))*100
sum(data_pivot$INTERMEDIATE)/(sum(data_pivot$ADVANCED)+sum(data_pivot$INTERMEDIATE)+sum(data_pivot$BEGINNER))*100
sum(data_pivot$ADVANCED)/(sum(data_pivot$ADVANCED)+sum(data_pivot$INTERMEDIATE)+sum(data_pivot$BEGINNER))*100
```

```{r}
mappingLevel <- event_log_df[event_log_df$difficulty == "MODERATE",] %>%  group_by(mappingLevel) %>% summarise(count = n_distinct(actionBy))
mappingLevel$percentage <- round(mappingLevel$count/sum(mappingLevel$count)*100,1)
mappingLevel
```

```{r}
data_pivot <- dcast(event_log_df[event_log_df$difficulty == "CHALLENGING",], action ~ mappingLevel,value.var = "taskId", length)
data_pivot$sum <-  data_pivot$ADVANCED + data_pivot$BEGINNER + data_pivot$INTERMEDIATE
data_pivot$ADVANCEDper <- round(data_pivot$ADVANCED/data_pivot$sum*100,1)
data_pivot$BEGINNERper <- round(data_pivot$BEGINNER/data_pivot$sum*100,1)
data_pivot$INTERMEDIATEper <- round(data_pivot$INTERMEDIATE/data_pivot$sum*100,1)
data_pivot[c("action","ADVANCEDper","BEGINNERper","INTERMEDIATEper")] %>% gt() %>%  data_color(columns = 2:4, colors = col_numeric(palette = c("white","darkgreen"),domain = c(0,100)))
```

```{r}
challenging <- event_log_df[event_log_df$difficulty == "CHALLENGING",]
data_pivot <- dcast(challenging[challenging$action %in% c('LOCKED_FOR_MAPPING','MAPPED','AUTO_UNLOCKED_FOR_MAPPING','SPLIT','BADIMAGERY'),], projectId ~ mappingLevel,value.var = "username", fun.aggregate = uniqueN)
sum(data_pivot$BEGINNER)/(sum(data_pivot$ADVANCED)+sum(data_pivot$INTERMEDIATE)+sum(data_pivot$BEGINNER))*100
sum(data_pivot$INTERMEDIATE)/(sum(data_pivot$ADVANCED)+sum(data_pivot$INTERMEDIATE)+sum(data_pivot$BEGINNER))*100
sum(data_pivot$ADVANCED)/(sum(data_pivot$ADVANCED)+sum(data_pivot$INTERMEDIATE)+sum(data_pivot$BEGINNER))*100
```

```{r}
data_pivot <- dcast(challenging[challenging$action %in% c('LOCKED_FOR_VALIDATION','INVALIDATED','VALIDATED','AUTO_UNLOCKED_FOR_VALIDATION'),], projectId ~ mappingLevel,value.var = "username", fun.aggregate = uniqueN)
sum(data_pivot$BEGINNER)/(sum(data_pivot$ADVANCED)+sum(data_pivot$INTERMEDIATE)+sum(data_pivot$BEGINNER))*100
sum(data_pivot$INTERMEDIATE)/(sum(data_pivot$ADVANCED)+sum(data_pivot$INTERMEDIATE)+sum(data_pivot$BEGINNER))*100
sum(data_pivot$ADVANCED)/(sum(data_pivot$ADVANCED)+sum(data_pivot$INTERMEDIATE)+sum(data_pivot$BEGINNER))*100
```

```{r}
mappingLevel <- event_log_df[event_log_df$difficulty == "CHALLENGING",] %>%  group_by(mappingLevel) %>% summarise(count = n_distinct(actionBy))
mappingLevel$percentage <- round(mappingLevel$count/sum(mappingLevel$count)*100,1)
mappingLevel
```

```{r}
data_pivot <- dcast(event_log_df[event_log_df$priority == "LOW",], action ~ mappingLevel,value.var = "taskId", length)
data_pivot$sum <-  data_pivot$ADVANCED + data_pivot$BEGINNER + data_pivot$INTERMEDIATE
data_pivot$ADVANCEDper <- round(data_pivot$ADVANCED/data_pivot$sum*100,1)
data_pivot$BEGINNERper <- round(data_pivot$BEGINNER/data_pivot$sum*100,1)
data_pivot$INTERMEDIATEper <- round(data_pivot$INTERMEDIATE/data_pivot$sum*100,1)
data_pivot[c("action","ADVANCEDper","BEGINNERper","INTERMEDIATEper")] %>% gt() %>%  data_color(columns = 2:4, colors = col_numeric(palette = c("white","darkgreen"),domain = c(0,100)))
```

```{r}
low <- event_log_df[event_log_df$priority == "LOW",]
data_pivot <- dcast(low[low$action %in% c('LOCKED_FOR_MAPPING','MAPPED','AUTO_UNLOCKED_FOR_MAPPING','SPLIT','BADIMAGERY'),], projectId ~ mappingLevel,value.var = "username", fun.aggregate = uniqueN)
sum(data_pivot$BEGINNER)/(sum(data_pivot$ADVANCED)+sum(data_pivot$INTERMEDIATE)+sum(data_pivot$BEGINNER))*100
sum(data_pivot$INTERMEDIATE)/(sum(data_pivot$ADVANCED)+sum(data_pivot$INTERMEDIATE)+sum(data_pivot$BEGINNER))*100
sum(data_pivot$ADVANCED)/(sum(data_pivot$ADVANCED)+sum(data_pivot$INTERMEDIATE)+sum(data_pivot$BEGINNER))*100
```

```{r}
data_pivot <- dcast(low[low$action %in% c('LOCKED_FOR_VALIDATION','INVALIDATED','VALIDATED','AUTO_UNLOCKED_FOR_VALIDATION'),], projectId ~ mappingLevel,value.var = "username", fun.aggregate = uniqueN)
sum(data_pivot$BEGINNER)/(sum(data_pivot$ADVANCED)+sum(data_pivot$INTERMEDIATE)+sum(data_pivot$BEGINNER))*100
sum(data_pivot$INTERMEDIATE)/(sum(data_pivot$ADVANCED)+sum(data_pivot$INTERMEDIATE)+sum(data_pivot$BEGINNER))*100
sum(data_pivot$ADVANCED)/(sum(data_pivot$ADVANCED)+sum(data_pivot$INTERMEDIATE)+sum(data_pivot$BEGINNER))*100
```

```{r}
mappingLevel <- event_log_df[event_log_df$priority == "LOW",] %>%  group_by(mappingLevel) %>% summarise(count = n_distinct(actionBy))
mappingLevel$percentage <- round(mappingLevel$count/sum(mappingLevel$count)*100,1)
mappingLevel
```

```{r}
data_pivot <- dcast(event_log_df[event_log_df$priority == "MEDIUM",], action ~ mappingLevel,value.var = "taskId", length)
data_pivot$sum <-  data_pivot$ADVANCED + data_pivot$BEGINNER + data_pivot$INTERMEDIATE
data_pivot$ADVANCEDper <- round(data_pivot$ADVANCED/data_pivot$sum*100,1)
data_pivot$BEGINNERper <- round(data_pivot$BEGINNER/data_pivot$sum*100,1)
data_pivot$INTERMEDIATEper <- round(data_pivot$INTERMEDIATE/data_pivot$sum*100,1)
data_pivot[c("action","ADVANCEDper","BEGINNERper","INTERMEDIATEper")] %>% gt() %>%  data_color(columns = 2:4, colors = col_numeric(palette = c("white","darkgreen"),domain = c(0,100)))
```

```{r}
medium <- event_log_df[event_log_df$priority == "MEDIUM",]
data_pivot <- dcast(medium[medium$action %in% c('LOCKED_FOR_MAPPING','MAPPED','AUTO_UNLOCKED_FOR_MAPPING','SPLIT','BADIMAGERY'),], projectId ~ mappingLevel,value.var = "username", fun.aggregate = uniqueN)
sum(data_pivot$BEGINNER)/(sum(data_pivot$ADVANCED)+sum(data_pivot$INTERMEDIATE)+sum(data_pivot$BEGINNER))*100
sum(data_pivot$INTERMEDIATE)/(sum(data_pivot$ADVANCED)+sum(data_pivot$INTERMEDIATE)+sum(data_pivot$BEGINNER))*100
sum(data_pivot$ADVANCED)/(sum(data_pivot$ADVANCED)+sum(data_pivot$INTERMEDIATE)+sum(data_pivot$BEGINNER))*100
```

```{r}
data_pivot <- dcast(medium[medium$action %in% c('LOCKED_FOR_VALIDATION','INVALIDATED','VALIDATED','AUTO_UNLOCKED_FOR_VALIDATION'),], projectId ~ mappingLevel,value.var = "username", fun.aggregate = uniqueN)
sum(data_pivot$BEGINNER)/(sum(data_pivot$ADVANCED)+sum(data_pivot$INTERMEDIATE)+sum(data_pivot$BEGINNER))*100
sum(data_pivot$INTERMEDIATE)/(sum(data_pivot$ADVANCED)+sum(data_pivot$INTERMEDIATE)+sum(data_pivot$BEGINNER))*100
sum(data_pivot$ADVANCED)/(sum(data_pivot$ADVANCED)+sum(data_pivot$INTERMEDIATE)+sum(data_pivot$BEGINNER))*100
```

```{r}
mappingLevel <- event_log_df[event_log_df$priority == "MEDIUM",] %>%  group_by(mappingLevel) %>% summarise(count = n_distinct(actionBy))
mappingLevel$percentage <- round(mappingLevel$count/sum(mappingLevel$count)*100,1)
mappingLevel
```

```{r}
data_pivot <- dcast(event_log_df[event_log_df$priority == "HIGH",], action ~ mappingLevel,value.var = "taskId", length)
data_pivot$sum <-  data_pivot$ADVANCED + data_pivot$BEGINNER + data_pivot$INTERMEDIATE
data_pivot$ADVANCEDper <- round(data_pivot$ADVANCED/data_pivot$sum*100,1)
data_pivot$BEGINNERper <- round(data_pivot$BEGINNER/data_pivot$sum*100,1)
data_pivot$INTERMEDIATEper <- round(data_pivot$INTERMEDIATE/data_pivot$sum*100,1)
data_pivot[c("action","ADVANCEDper","BEGINNERper","INTERMEDIATEper")] %>% gt() %>%  data_color(columns = 2:4, colors = col_numeric(palette = c("white","darkgreen"),domain = c(0,100)))
```

```{r}
high <- event_log_df[event_log_df$priority == "HIGH",]
data_pivot <- dcast(high[high$action %in% c('LOCKED_FOR_MAPPING','MAPPED','AUTO_UNLOCKED_FOR_MAPPING','SPLIT','BADIMAGERY'),], projectId ~ mappingLevel,value.var = "username", fun.aggregate = uniqueN)
sum(data_pivot$BEGINNER)/(sum(data_pivot$ADVANCED)+sum(data_pivot$INTERMEDIATE)+sum(data_pivot$BEGINNER))*100
sum(data_pivot$INTERMEDIATE)/(sum(data_pivot$ADVANCED)+sum(data_pivot$INTERMEDIATE)+sum(data_pivot$BEGINNER))*100
sum(data_pivot$ADVANCED)/(sum(data_pivot$ADVANCED)+sum(data_pivot$INTERMEDIATE)+sum(data_pivot$BEGINNER))*100
```

```{r}
data_pivot <- dcast(high[high$action %in% c('LOCKED_FOR_VALIDATION','INVALIDATED','VALIDATED','AUTO_UNLOCKED_FOR_VALIDATION'),], projectId ~ mappingLevel,value.var = "username", fun.aggregate = uniqueN)
sum(data_pivot$BEGINNER)/(sum(data_pivot$ADVANCED)+sum(data_pivot$INTERMEDIATE)+sum(data_pivot$BEGINNER))*100
sum(data_pivot$INTERMEDIATE)/(sum(data_pivot$ADVANCED)+sum(data_pivot$INTERMEDIATE)+sum(data_pivot$BEGINNER))*100
sum(data_pivot$ADVANCED)/(sum(data_pivot$ADVANCED)+sum(data_pivot$INTERMEDIATE)+sum(data_pivot$BEGINNER))*100
```

```{r}
mappingLevel <- event_log_df[event_log_df$priority == "HIGH",] %>%  group_by(mappingLevel) %>% summarise(count = n_distinct(actionBy))
mappingLevel$percentage <- round(mappingLevel$count/sum(mappingLevel$count)*100,1)
mappingLevel
```

```{r}
data_pivot <- dcast(event_log_df[event_log_df$priority == "URGENT",], action ~ mappingLevel,value.var = "taskId", length)
data_pivot$sum <-  data_pivot$ADVANCED + data_pivot$BEGINNER + data_pivot$INTERMEDIATE
data_pivot$ADVANCEDper <- round(data_pivot$ADVANCED/data_pivot$sum*100,1)
data_pivot$BEGINNERper <- round(data_pivot$BEGINNER/data_pivot$sum*100,1)
data_pivot$INTERMEDIATEper <- round(data_pivot$INTERMEDIATE/data_pivot$sum*100,1)
data_pivot[c("action","ADVANCEDper","BEGINNERper","INTERMEDIATEper")] %>% gt() %>%  data_color(columns = 2:4, colors = col_numeric(palette = c("white","darkgreen"),domain = c(0,100)))
```

```{r}
urgent <- event_log_df[event_log_df$priority == "URGENT",]
data_pivot <- dcast(urgent[urgent$action %in% c('LOCKED_FOR_MAPPING','MAPPED','AUTO_UNLOCKED_FOR_MAPPING','SPLIT','BADIMAGERY'),], projectId ~ mappingLevel,value.var = "username", fun.aggregate = uniqueN)
sum(data_pivot$BEGINNER)/(sum(data_pivot$ADVANCED)+sum(data_pivot$INTERMEDIATE)+sum(data_pivot$BEGINNER))*100
sum(data_pivot$INTERMEDIATE)/(sum(data_pivot$ADVANCED)+sum(data_pivot$INTERMEDIATE)+sum(data_pivot$BEGINNER))*100
sum(data_pivot$ADVANCED)/(sum(data_pivot$ADVANCED)+sum(data_pivot$INTERMEDIATE)+sum(data_pivot$BEGINNER))*100
```

```{r}
data_pivot <- dcast(urgent[urgent$action %in% c('LOCKED_FOR_VALIDATION','INVALIDATED','VALIDATED','AUTO_UNLOCKED_FOR_VALIDATION'),], projectId ~ mappingLevel,value.var = "username", fun.aggregate = uniqueN)
sum(data_pivot$BEGINNER)/(sum(data_pivot$ADVANCED)+sum(data_pivot$INTERMEDIATE)+sum(data_pivot$BEGINNER))*100
sum(data_pivot$INTERMEDIATE)/(sum(data_pivot$ADVANCED)+sum(data_pivot$INTERMEDIATE)+sum(data_pivot$BEGINNER))*100
sum(data_pivot$ADVANCED)/(sum(data_pivot$ADVANCED)+sum(data_pivot$INTERMEDIATE)+sum(data_pivot$BEGINNER))*100
```

```{r}
mappingLevel <- event_log_df[event_log_df$priority == "URGENT",] %>%  group_by(mappingLevel) %>% summarise(count = n_distinct(actionBy))
mappingLevel$percentage <- round(mappingLevel$count/sum(mappingLevel$count)*100,1)
mappingLevel
```
